#delimit;
clear;
set more off;
capture log close;
set more off;

capture erase /data/doed_loan_raw.dta;

capture erase /data/doed_pell_raw.dta;

!st /data/doed_loan_raw.sas7bdat /data/doed_loan_raw.dta;

!st /data/doed_pell_raw.sas7bdat /data/doed_pell_raw.dta;

/*****
CHANGE PATH
*****/;
local out "/data";
local pathtab "/data";
local in "/data";
local pathtab2 "/data";

/*First, saving home state to then merge to state of enrollment*/
use "`pathtab2'/dataset_for_regs_new_RandR_homestate_noCB.dta", clear;

sort pid;

rename first_col_state_earl_new st_e_new;
keep pid st_e_new;
save "`out'/tomerge_homestate.dta", replace;

/*Now working on states of school enrollment*/
/*NSC*/
use "`out'/NSC_merged.dta", clear;

keep pid College_State Enrollment_Begin;
rename College_State state;

save "`pathtab'/OutofState_NSC.dta", replace;


/*NLSDS*/
use "`in'/doed_loan_raw.dta", clear;
/****************
I'm dropping the loans that were matched based on matching criteria=5
*****************/;

format dt %td;
format per_beg_dt %td;
format per_end_dt %td;
format loan_stat_dt %td;
format curr_mat_dt %td;
format curr_dis_dt %td;
format curr_can_dt %td;
format out_prin_bal_dt %td;

/*By this time, the oldest person in the sample was 15 years old, too young to be in college.
I do this because there are 54 cases that enrollment starts in 1900*/
gen end_year_1988=mdy(6,30,1988);
drop if per_beg_dt<end_year_1988;
drop end_year_1988;
/*163*/

drop if match_criteria==5;

/*Dropping loans that were cancelled*/
drop if tot_dis==0;

rename tuedt2003 pid;

gen loan_type_num=1 if loan_type=="D1";
replace loan_type_num=2 if loan_type=="D2";
replace loan_type_num=3 if loan_type=="D3";
replace loan_type_num=4 if loan_type=="D4";
replace loan_type_num=5 if loan_type=="D5";
replace loan_type_num=6 if loan_type=="D6";
replace loan_type_num=7 if loan_type=="D7";
replace loan_type_num=8 if loan_type=="EU";
replace loan_type_num=9 if loan_type=="FI";
replace loan_type_num=10 if loan_type=="IC";
replace loan_type_num=11 if loan_type=="GB";
replace loan_type_num=12 if loan_type=="NU";
replace loan_type_num=13 if loan_type=="PE";
replace loan_type_num=14 if loan_type=="PK";
replace loan_type_num=15 if loan_type=="PL";
replace loan_type_num=16 if loan_type=="SE";
replace loan_type_num=17 if loan_type=="SF";
replace loan_type_num=18 if loan_type=="SL";
replace loan_type_num=19 if loan_type=="SU";
replace loan_type_num=20 if loan_type=="SG";
replace loan_type_num=21 if loan_type=="CL";
replace loan_type_num=22 if loan_type=="PU";
replace loan_type_num=23 if loan_type=="DU";
replace loan_type_num=24 if loan_type=="RF";

label var loan_type_num "Type of loan (numeric var of loan_type)";
label define loan_type_lab 1 "1=Direct Staf Sub" 2 "2=Direct Staf Unsub" 3 "3=Direct Plus Grad" 4 "4=Direct plus"
	 5 "5=Direct cons Unsub" 6 "6=Direct cons sub" 7 "7=Direct plus cons" 8 "8=Perk Expanded Lending" 9 "9=Federalluy Insured (FISL)"
	 10 "10=ICL" 11 "11=FFEL PLUS Grad" 12 "12=NDSL" 13 "13=Fed Pell Grant" 14 "14=Fed Perkins Loan" 15 "15=FFEL PLUS Loan" 16 "16=FSEOG" 17 "17=FFEL Staf Sub" 18 "18=Supplemental Loan (SLS)"
	 19 "19=FFEL Staf Unsub" 20 "20=Nat'l Sci. & Math Access to retain talent grant" 21 "21=FFEL Cons Loan"  22 "22=Perk Loan"  23 "23=SNat'l Defense Loan"  24 "24=FFEL Refi Loan";
label values loan_type_num loan_type_lab;

/*Consolidated loan*/;
gen cons_loan=(loan_type_num==5 | loan_type_num==6 | loan_type_num==21);
/*Parent Plus-As difference from plus and plus grad*/;
gen plus_par_loan=(loan_type_num==4 | loan_type_num==15);

/*If the loan is a consolidated loan, per_beg_dt and per_end_dt has no meaning, and they almost always take the value 01jan2001. I'll replace them for missing*/
drop if cons_loan==1;
drop if plus_par_loan==1;

/*Making opeid compatible*/
gen opeid_1=substr(opeid,1,6);
gen opeid_2=substr(opeid,7,2);
drop opeid;
gen opeid=opeid_1+"-"+opeid_2 if opeid_1~="";
drop opeid_2;

/*Sorting by pid and per_beg_dt to know the time people enrolled for the first time*/
sort pid per_beg_dt;

/*Names are missleading, as first does not mean necessarily first, as I'm keeping all the colleges. I do it so I don't modify the code much*/
rename opeid first_col_opeid_nslds;
rename per_beg_dt first_col_enroll_nslds;


keep pid first_col*;
keep if first_col_enroll~=.;

gen year=year(first_col_enroll);
gen month=month(first_col_enroll);
gen year_enroll=year-1 if month<=8;
replace year_enroll=year if month>=9;

drop year month;
sort pid;

save "`pathtab'/temp_nslds_tomerge_first_col.dta", replace;

gen first_col_opeid6_nslds=substr(first_col_opeid_nslds,1,6);

replace year_enroll=1995 if year_enroll<1995;
sort first_col_opeid_nslds year_enroll;
merge m:1 first_col_opeid_nslds year_enroll using "`pathtab'/ipeds_state.dta";
keep if _merge==3;
drop idx _merge unitid first_col_opeid6_nslds;

save "`pathtab'/ipeds_state_merge_opeid8_temp.dta", replace;

use "`pathtab'/temp_nslds_tomerge_first_col.dta", clear;
gen first_col_opeid6_nslds=substr(first_col_opeid_nslds,1,6);

replace year_enroll=1995 if year_enroll<1995;
sort first_col_opeid_nslds year_enroll;
merge m:1 first_col_opeid_nslds year_enroll using "`pathtab'/ipeds_state.dta";
keep if _merge==1;
drop _merge;
drop first_col_state first_col_state_fips first_col_sector;

merge m:1 first_col_opeid6_nslds year_enroll using "`pathtab'/ipeds_state_opeid6.dta";
keep if _merge==3;
drop _merge;

append using "`pathtab'/ipeds_state_merge_opeid8_temp.dta";

drop idx unitid first_col_opeid6_nslds;
sort pid;

save "`pathtab'/ipeds_state_merge_opeid8_temp.dta", replace;

use "`pathtab'/temp_nslds_tomerge_first_col.dta", clear;
gen first_col_opeid6_nslds=substr(first_col_opeid_nslds,1,6);

replace year_enroll=1995 if year_enroll<1995;
sort first_col_opeid_nslds year_enroll;
merge m:1 first_col_opeid_nslds year_enroll using "`pathtab'/ipeds_state.dta";
keep if _merge==1;
drop _merge;
drop first_col_instnm_nslds first_col_state first_col_state_fips first_col_sector;

merge m:1 first_col_opeid6_nslds year_enroll using "`pathtab'/ipeds_state_opeid6.dta";
keep if _merge==1;
drop _merge;
drop idx unitid first_col_opeid6_nslds;
drop first_col_state first_col_state_fips first_col_sector;

merge m:1 first_col_opeid_nslds using "`pathtab'/ipeds_state_noyear.dta";
keep if _merge==3;
drop _merge;

sort pid;
count if pid==pid[_n-1];

append using "`pathtab'/ipeds_state_merge_opeid8_temp.dta";

save "`pathtab'/ipeds_state_merge_opeid8_temp.dta", replace;


use "`pathtab'/temp_nslds_tomerge_first_col.dta", clear;
gen first_col_opeid6_nslds=substr(first_col_opeid_nslds,1,6);

replace year_enroll=1995 if year_enroll<1995;
sort first_col_opeid_nslds year_enroll;
merge m:1 first_col_opeid_nslds year_enroll using "`pathtab'/ipeds_state.dta";
keep if _merge==1;
drop _merge;
drop first_col_instnm_nslds first_col_state first_col_state_fips first_col_sector;

merge m:1 first_col_opeid6_nslds year_enroll using "`pathtab'/ipeds_state_opeid6.dta";
keep if _merge==1;
drop _merge;
drop idx unitid first_col_opeid6_nslds;
drop first_col_state first_col_state_fips first_col_sector;

merge m:1 first_col_opeid_nslds using "`pathtab'/ipeds_state_noyear.dta";
keep if _merge==1;
drop _merge;
drop first_col_state first_col_state_fips first_col_sector;

rename first_col_opeid_nslds opeid;
merge m:1 opeid using "`pathtab'/opeid_NSC.dta";
keep if _merge~=2;
drop _merge;

sort pid;
count if pid==pid[_n-1];

rename opeid first_col_opeid_nslds;
rename College_State first_col_state;

append using "`pathtab'/ipeds_state_merge_opeid8_temp.dta";
drop year_enroll;

rename first_col_state first_col_state_nslds;
rename first_col_state_fips first_col_state_fips_nslds;
rename first_col_sector first_col_sector_nslds;

sort pid;
save "`pathtab'/OutofState_NSLDS.dta", replace;



/*Pell Grants*/
use "`in'/doed_pell_raw.dta", clear;

format dis_dt %td;
label var dis_dt "last disbursement date";

label define match_criteria 1 "1=SSN, first 3 letters, and DOB" 2 "2=SSN and DOB" 3 "3=SSN and fisrt 3 letters" 4 "4=SSN only" 5 "5=First 3 letters and DOB";
label values match_criteria match_criteria;
label var match_criteria "how the information was matched";
drop if match_criteria==5;

/*I will drop if type==SG*/;
drop if type=="SG";
drop type;

/*I will drop those cases where the amount paid to date is 0*/
drop if amt_paid_to_dt==0;

rename tuedt2003 pid;

sort pid award_yr dis_dt;


/*Making opeid compatible*/
gen opeid_1=substr(att_opeid,1,6);
gen opeid_2=substr(att_opeid,7,2);
gen opeid=opeid_1+"-"+opeid_2 if opeid_1~="";
drop opeid_2;

keep pid award_yr opeid_1 opeid;
rename opeid first_col_opeid_nslds;
rename opeid_1 first_col_opeid6_nslds;

sort pid;

gen year_enroll=award_yr;
replace year_enroll=1995 if award_yr<1995;

save "`pathtab'/temp_pell_tomerge_first_col.dta", replace;

merge m:1 first_col_opeid_nslds year_enroll using "`pathtab'/ipeds_state.dta";
keep if _merge==3;
drop idx _merge unitid;

save "`pathtab'/ipeds_state_merge_opeid8_pell_temp.dta", replace;

use "`pathtab'/temp_pell_tomerge_first_col.dta", clear;
sort first_col_opeid_nslds year_enroll;
merge m:1 first_col_opeid_nslds year_enroll using "`pathtab'/ipeds_state.dta";
keep if _merge==1;
drop _merge;
drop first_col_state first_col_state_fips first_col_sector;

merge m:1 first_col_opeid6_nslds year_enroll using "`pathtab'/ipeds_state_opeid6.dta";
keep if _merge==3;
drop _merge;

drop idx unitid first_col_opeid6_nslds;

append using "`pathtab'/ipeds_state_merge_opeid8_pell_temp.dta";

sort pid award_yr;

save "`pathtab'/ipeds_state_merge_opeid8_pell_temp.dta", replace;

use "`pathtab'/temp_pell_tomerge_first_col.dta", clear;
sort first_col_opeid_nslds year_enroll;
merge m:1 first_col_opeid_nslds year_enroll using "`pathtab'/ipeds_state.dta";
keep if _merge==1;
drop _merge;
drop first_col_state first_col_state_fips first_col_sector;

merge m:1 first_col_opeid6_nslds year_enroll using "`pathtab'/ipeds_state_opeid6.dta";
keep if _merge==1;
drop _merge;

drop idx unitid first_col_opeid6_nslds;
drop first_col_state first_col_state_fips first_col_sector;

merge m:1 first_col_opeid_nslds using "`pathtab'/ipeds_state_noyear.dta";
keep if _merge==3;
drop _merge;

append using "`pathtab'/ipeds_state_merge_opeid8_pell_temp.dta";

sort pid award_yr;

save "`pathtab'/ipeds_state_merge_opeid8_pell_temp.dta", replace;


use "`pathtab'/temp_pell_tomerge_first_col.dta", clear;
sort first_col_opeid_nslds year_enroll;
merge m:1 first_col_opeid_nslds year_enroll using "`pathtab'/ipeds_state.dta";
keep if _merge==1;
drop _merge;
drop first_col_state first_col_state_fips first_col_sector;

merge m:1 first_col_opeid6_nslds year_enroll using "`pathtab'/ipeds_state_opeid6.dta";
keep if _merge==1;
drop _merge;

drop idx unitid first_col_opeid6_nslds;
drop first_col_state first_col_state_fips first_col_sector;

merge m:1 first_col_opeid_nslds using "`pathtab'/ipeds_state_noyear.dta";
keep if _merge==1;
drop _merge;

drop first_col_state first_col_state_fips first_col_sector;

rename first_col_opeid_nslds opeid;
merge m:1 opeid using "`pathtab'/opeid_NSC.dta";
keep if _merge~=2;
drop _merge;

sort pid;

rename opeid first_col_opeid_nslds;
rename College_State first_col_state;

append using "`pathtab'/ipeds_state_merge_opeid8_pell_temp.dta";
drop year_enroll;

rename first_col_state first_col_state_pell;
rename first_col_state_fips first_col_state_fips_pell;
rename first_col_sector first_col_sector_pell;
rename first_col_opeid_nslds first_col_opeid_pell;
sort pid award_yr;
drop first_col_opeid6_nslds;

/*I will assume that if first_pell_year is, for example, 2003, enrollment started in september
of that year*/
gen first_pell_year_mdy=mdy(9,30,award_yr);
format %td first_pell_year_mdy;


save "`pathtab'/OutofState_Pell.dta", replace;

append using "`pathtab'/OutofState_NSC.dta";
append using "`pathtab'/OutofState_NSLDS.dta";

drop award_yr first_col_opeid_pell first_col_instnm_nslds first_col_sector_pell first_col_opeid_nslds first_col_sector_nslds;

#delimit;
gen enroll_b=first_pell_year_mdy;
replace enroll_b=Enrollment_Begin if Enrollment_Begin~=.;
replace enroll_b=first_col_enroll_nslds if first_col_enroll_nslds~=.;
format %td  enroll_b;

drop first_pell_year_mdy Enrollment_Begin first_col_enroll_nslds;

replace state=first_col_state_pell if first_col_state_pell~="";
replace state=first_col_state_nslds if first_col_state_nslds~="";

gen state_n=.;
replace state_n=first_col_state_fips_pell if first_col_state_fips_pell~=.;
replace state_n=first_col_state_fips_nslds if first_col_state_fips_nslds~=.;

replace state_n=2 if state=="AK" & state_n==.;
replace state_n=1 if state=="AL" & state_n==.;
replace state_n=5 if state=="AR" & state_n==.;
replace state_n=4 if state=="AZ" & state_n==.;
replace state_n=6 if state=="CA" & state_n==.;
replace state_n=8 if state=="CO" & state_n==.;
replace state_n=9 if state=="CT" & state_n==.;
replace state_n=11 if state=="DC" & state_n==.;
replace state_n=10 if state=="DE" & state_n==.;
replace state_n=12 if state=="FL" & state_n==.;
replace state_n=13 if state=="GA" & state_n==.;
replace state_n=15 if state=="HI" & state_n==.;
replace state_n=19 if state=="IA" & state_n==.;
replace state_n=16 if state=="ID" & state_n==.;
replace state_n=17 if state=="IL" & state_n==.;
replace state_n=18 if state=="IN" & state_n==.;
replace state_n=20 if state=="KS" & state_n==.;
replace state_n=21 if state=="KY" & state_n==.;
replace state_n=22 if state=="LA" & state_n==.;
replace state_n=25 if state=="MA" & state_n==.;
replace state_n=24 if state=="MD" & state_n==.;
replace state_n=23 if state=="ME" & state_n==.;
replace state_n=26 if state=="MI" & state_n==.;
replace state_n=27 if state=="MN" & state_n==.;
replace state_n=28 if state=="MS" & state_n==.;
replace state_n=29 if state=="MO" & state_n==.;
replace state_n=30 if state=="MT" & state_n==.;
replace state_n=37 if state=="NC" & state_n==.;
replace state_n=38 if state=="ND" & state_n==.;
replace state_n=31 if state=="NE" & state_n==.;
replace state_n=32 if state=="NV" & state_n==.;
replace state_n=33 if state=="NH" & state_n==.;
replace state_n=34 if state=="NJ" & state_n==.;
replace state_n=35 if state=="NM" & state_n==.;
replace state_n=36 if state=="NY" & state_n==.;
replace state_n=39 if state=="OH" & state_n==.;
replace state_n=40 if state=="OK" & state_n==.;
replace state_n=41 if state=="OR" & state_n==.;
replace state_n=42 if state=="PA" & state_n==.;
replace state_n=72 if state=="PR" & state_n==.;
replace state_n=44 if state=="RI" & state_n==.;
replace state_n=45 if state=="SC" & state_n==.;
replace state_n=46 if state=="SD" & state_n==.;
replace state_n=47 if state=="TN" & state_n==.;
replace state_n=48 if state=="TX" & state_n==.;
replace state_n=49 if state=="UT" & state_n==.;
replace state_n=51 if state=="VA" & state_n==.;
replace state_n=50 if state=="VT" & state_n==.;
replace state_n=53 if state=="WA" & state_n==.;
replace state_n=54 if state=="WV" & state_n==.;
replace state_n=55 if state=="WI" & state_n==.;
replace state_n=56 if state=="WY" & state_n==.;
replace state_n=99 if state=="FO" & state_n==.; /*Foreing Schools*/

drop if state=="";

drop first_col_state*;

/*******Age Variables********/;
/*I merge because I only care about school enrollment before age 23*/
merge m:1 pid using "`pathtab'/age_tomerge.dta";
drop if _m==2;
drop _merge;

drop age_2004 dob_*;

gen age_enroll_b=(enroll_b-dob)/365;
drop if age_enroll_b>=23;

merge m:1 pid using "`out'/tomerge_homestate.dta";

gen count=1 if state~=st_e_new & _m==3;
sort pid enroll_b;
by pid: egen outofstate=max(count);
/*Now creating one for first college out of state*/
gen count2=1 if state~=st_e_new & _m==3 & (_n==1 | pid~=pid[_n-1]);
by pid: egen outofstate_first=max(count2);
replace outofstate=0 if outofstate==.;
replace outofstate_first=0 if outofstate_first==.;

by pid: keep if _n==1;

keep pid outof*;

save "`pathtab2'/OutofState_indicator.dta", replace;


erase "`pathtab'/ipeds_state_merge_opeid8_temp.dta";
erase "`pathtab'/ipeds_state_merge_opeid8_pell_temp.dta";
erase "`out'/tomerge_homestate.dta";
